{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Working with Semantic Knowledge Graphs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "\n",
    "from pyspark.sql import SparkSession\n",
    "\n",
    "from aips import get_engine\n",
    "from aips import get_semantic_knowledge_graph as get_skg\n",
    "\n",
    "spark = SparkSession.builder.appName(\"AIPS\").getOrCreate()\n",
    "\n",
    "engine = get_engine()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# %load -s generate_request_root,generate_facets,default_node_name,validate_skg_request_input,generate_request,transform_node,transform_response_facet,sort_by_relatedness_desc,traverse engine/solr/skg\n",
    "def generate_request_root():\n",
    "    return {\n",
    "        \"limit\": 0,\n",
    "        \"params\": {\n",
    "            \"q\": \"*:*\",\n",
    "            \"fore\": \"{!${defType} v=$q}\",\n",
    "            \"back\": \"*:*\",\n",
    "            \"defType\": \"edismax\"\n",
    "        },\n",
    "        \"facet\": {}\n",
    "    }\n",
    "\n",
    "def generate_facets(name=None, values=None, field=None,\n",
    "                    min_occurrences=None, limit=None,\n",
    "                    min_popularity=None, default_operator=\"AND\"):\n",
    "    base_facet = {\"type\": \"query\" if values else \"terms\",\n",
    "                  \"limit\": limit if limit else 10,\n",
    "                  \"sort\": { \"relatedness\": \"desc\" },\n",
    "                  \"facet\": {\n",
    "                      \"relatedness\": {\n",
    "                          \"type\": \"func\",\n",
    "                          \"func\": \"relatedness($fore,$back)\"}}}\n",
    "    if min_occurrences:\n",
    "        base_facet[\"mincount\"] = min_occurrences\n",
    "    if min_popularity:\n",
    "        base_facet[\"facet\"][\"relatedness\"][\"min_popularity\"] = min_popularity\n",
    "    if field:\n",
    "        base_facet[\"field\"] = field\n",
    "    facets = []\n",
    "    if values:\n",
    "        if min_occurrences: base_facet.pop(\"mincount\")\n",
    "        if not limit: base_facet.pop(\"limit\")\n",
    "        for i, _ in enumerate(values):\n",
    "            facets.append(base_facet.copy())\n",
    "            facets[i][\"query\"] = \"{\" + f'!edismax q.op={default_operator} qf={field} v=${name}_{i}_query' + \"}\"\n",
    "    else:\n",
    "        facets = [base_facet]\n",
    "    return facets\n",
    "\n",
    "def default_node_name(i, j):\n",
    "    return \"f\" + str(i) + (f\"_{j}\" if j else \"\")\n",
    "\n",
    "def validate_skg_request_input(multi_node):\n",
    "    if isinstance(multi_node, list):\n",
    "        map(validate_skg_request_input, multi_node)\n",
    "        node_names = [node[\"name\"] for node in multi_node]\n",
    "        if len(node_names) != len(set(node_names)):\n",
    "            raise ValueError(\"Node names must be distinct on a given level.\")\n",
    "    if \"field\" not in multi_node: # and \"values\" in multi_node\n",
    "        raise ValueError(\"'field' must be provided\")\n",
    "\n",
    "def transform_request(*multi_nodes):\n",
    "    \"\"\"Generates a faceted Solr SKG request from a set of multi-nodes. \n",
    "       A multi-node can be a single node or a collection of nodes.\n",
    "       A node can contain the following params: `name`, `values`, `field`, `min_occurance` and `limit`.\n",
    "       :param str name: An optional name of the node. If not provided a default will be assigned\n",
    "       :param list of str value: If empty or absent, a terms facet is used. Otherwise a query facet per value is used\n",
    "       :param str field: The field to query against or discover values from.\n",
    "       :param int min_occurance: The mincount on the facet.\n",
    "       :param int limit: The limit on the facet.\n",
    "       Each subsequent node is applied as a nested facet to all parent facets.\"\"\"\n",
    "    map(validate_skg_request_input, multi_nodes)\n",
    "    request = generate_request_root()\n",
    "    parent_nodes = [request]\n",
    "    for i, multi_node in enumerate(multi_nodes):\n",
    "        current_facets = []\n",
    "        if isinstance(multi_node, dict):\n",
    "            multi_node = [multi_node]   \n",
    "        for j, node in enumerate(multi_node):\n",
    "            if \"name\" not in node:\n",
    "                node[\"name\"] = default_node_name(i, j)\n",
    "            facets = generate_facets(**node)\n",
    "            current_facets.extend(facets)\n",
    "            for i, parent_node in enumerate(parent_nodes):\n",
    "                for j, facet in enumerate(facets):\n",
    "                    parent_node[\"facet\"][f'{node[\"name\"]}_{j}'] = facet\n",
    "            if \"values\" in node:\n",
    "                for i, value in enumerate(node[\"values\"]):\n",
    "                    request[\"params\"][f'{node[\"name\"]}_{i}_query'] = value\n",
    "        parent_nodes = current_facets\n",
    "    return request\n",
    "\n",
    "def transform_node(node, response_params):\n",
    "    relatedness = node[\"relatedness\"][\"relatedness\"] if node[\"count\"] > 0 else 0.0\n",
    "    value_node = {\"relatedness\": relatedness}\n",
    "    sub_traversals = transform_response_facet(node, response_params)\n",
    "    if sub_traversals:\n",
    "        value_node[\"traversals\"] = sub_traversals\n",
    "    return value_node\n",
    "\n",
    "def transform_response_facet(node, response_params):\n",
    "    ignored_keys = [\"count\", \"relatedness\", \"val\"]\n",
    "    traversals = {}\n",
    "    for full_name, data in node.items():\n",
    "        if full_name in ignored_keys:\n",
    "            continue\n",
    "        name = full_name.removesuffix(\"_\" + full_name.split(\"_\")[-1])\n",
    "        if name not in traversals:\n",
    "            traversals[name] = {\"name\": name, \"values\": {}}\n",
    "        if \"buckets\" in data:\n",
    "            values_node = {b[\"val\"] : transform_node(b, response_params)\n",
    "                           for b in data[\"buckets\"]}\n",
    "            traversals[name][\"values\"] = values_node\n",
    "        else:\n",
    "            value_name = response_params[f\"{full_name}_query\"]            \n",
    "            traversals[name][\"values\"][value_name] = transform_node(data, response_params)\n",
    "    for k in traversals.keys():\n",
    "        traversals[k][\"values\"] = sort_by_relatedness_desc(traversals[k][\"values\"])\n",
    "    return list(traversals.values())\n",
    "\n",
    "def sort_by_relatedness_desc(d):\n",
    "    return {k: v for k, v in sorted(d.items(), key=lambda item: item[1][\"relatedness\"], reverse=True)}\n",
    "\n",
    "def traverse(collection, *nodes):\n",
    "    request = transform_request(*nodes)\n",
    "    response = collection.native_search(request)\n",
    "    return {\"graph\": transform_response_facet(response[\"facets\"], request[\"params\"])}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_graph(traversal, query):\n",
    "    for term, data in traversal[\"graph\"][0][\"values\"][query][\"traversals\"][0][\"values\"].items():\n",
    "        print(f'{term}  {data[\"relatedness\"]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "advil  0.70986\n",
      "motrin  0.59897\n",
      "aleve  0.4662\n",
      "ibuprofen  0.38264\n",
      "alleve  0.36649\n",
      "tylenol  0.33048\n",
      "naproxen  0.31226\n",
      "acetaminophen  0.17706\n"
     ]
    }
   ],
   "source": [
    "health_skg = get_skg(engine.get_collection(\"health\"))\n",
    "\n",
    "nodes_to_traverse = [{\"field\": \"body\", \n",
    "                      \"values\": [\"advil\"]},\n",
    "                     {\"field\": \"body\",\n",
    "                      \"min_occurrences\": 2,\n",
    "                      \"limit\": 8}]\n",
    "\n",
    "traversal = health_skg.traverse(*nodes_to_traverse)\n",
    "print_graph(traversal, \"advil\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{\n",
      "  \"limit\": 0,\n",
      "  \"params\": {\n",
      "    \"q\": \"*:*\",\n",
      "    \"fore\": \"{!${defType} v=$q}\",\n",
      "    \"back\": \"*:*\",\n",
      "    \"defType\": \"edismax\",\n",
      "    \"f0_0_query\": \"advil\"\n",
      "  },\n",
      "  \"facet\": {\n",
      "    \"f0_0\": {\n",
      "      \"type\": \"query\",\n",
      "      \"sort\": {\n",
      "        \"relatedness\": \"desc\"\n",
      "      },\n",
      "      \"facet\": {\n",
      "        \"relatedness\": {\n",
      "          \"type\": \"func\",\n",
      "          \"func\": \"relatedness($fore,$back)\"\n",
      "        },\n",
      "        \"f1_0\": {\n",
      "          \"type\": \"terms\",\n",
      "          \"limit\": 8,\n",
      "          \"sort\": {\n",
      "            \"relatedness\": \"desc\"\n",
      "          },\n",
      "          \"facet\": {\n",
      "            \"relatedness\": {\n",
      "              \"type\": \"func\",\n",
      "              \"func\": \"relatedness($fore,$back)\"\n",
      "            }\n",
      "          },\n",
      "          \"mincount\": 2,\n",
      "          \"field\": \"body\"\n",
      "        }\n",
      "      },\n",
      "      \"field\": \"body\",\n",
      "      \"query\": \"{!edismax q.op=AND qf=body v=$f0_0_query}\"\n",
      "    }\n",
      "  }\n",
      "}\n"
     ]
    }
   ],
   "source": [
    "skg_search_request = health_skg.transform_request(*nodes_to_traverse)\n",
    "print(json.dumps(skg_search_request, indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "vibranium  0.94237\n",
      "wakandan  0.8197\n",
      "adamantium  0.80724\n",
      "wakanda  0.79122\n",
      "alloy  0.75724\n",
      "maclain  0.75623\n",
      "klaw  0.75222\n",
      "america's  0.74002\n"
     ]
    }
   ],
   "source": [
    "stackexchange_skg = get_skg(engine.get_collection(\"stackexchange\"))\n",
    "        \n",
    "query = \"vibranium\"\n",
    "nodes_to_traverse = [{\"field\": \"body\", \"values\": [query]},\n",
    "                     {\"field\": \"body\", \"min_occurrences\": 2, \"limit\": 8}]\n",
    "\n",
    "traversal = stackexchange_skg.traverse(*nodes_to_traverse)\n",
    "\n",
    "print_graph(traversal, query)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.6"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Query Augmentation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Expanded Query:\n",
      "vibranium^5 vibranium^0.94237 wakandan^0.8197 adamantium^0.80724 wakanda^0.79122 alloy^0.75724 maclain^0.75623 klaw^0.75222 america's^0.74002 \n"
     ]
    }
   ],
   "source": [
    "expansion = \"\"\n",
    "for term, stats in traversal[\"graph\"][0][\"values\"][query] \\\n",
    "                            [\"traversals\"][0][\"values\"].items():\n",
    "    expansion += f'{term}^{stats[\"relatedness\"]} '    \n",
    "expanded_query = f\"{query}^5 \" + expansion\n",
    "\n",
    "print(f\"Expanded Query:\\n{expanded_query}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Simple Query Expansion:\n",
      "{'query': \"vibranium vibranium^0.94237 wakandan^0.8197 adamantium^0.80724 wakanda^0.79122 alloy^0.75724 maclain^0.75623 klaw^0.75222 america's^0.74002 \", 'query_fields': ['title', 'body'], 'min_match': '1'}\n",
      "\n",
      "Increased Precision, Reduced Recall Query:\n",
      "{'query': \"vibranium vibranium^0.94237 wakandan^0.8197 adamantium^0.80724 wakanda^0.79122 alloy^0.75724 maclain^0.75623 klaw^0.75222 america's^0.74002 \", 'query_fields': ['title', 'body'], 'min_match': '30%'}\n",
      "\n",
      "Increased Precision, No Reduction in Recall:\n",
      "{'query': \"vibranium AND (vibranium^0.94237 wakandan^0.8197 adamantium^0.80724 wakanda^0.79122 alloy^0.75724 maclain^0.75623 klaw^0.75222 america's^0.74002 )\", 'query_fields': ['title', 'body'], 'min_match': '2'}\n",
      "\n",
      "Slightly Increased Recall Query:\n",
      "{'query': \"vibranium vibranium^0.94237 wakandan^0.8197 adamantium^0.80724 wakanda^0.79122 alloy^0.75724 maclain^0.75623 klaw^0.75222 america's^0.74002 \", 'query_fields': ['title', 'body'], 'min_match': '2'}\n",
      "\n",
      "Same Results, Better Conceptual Ranking:\n",
      "{'query': 'vibranium', 'query_fields': ['title', 'body'], 'min_match': '2', 'query_boosts': \"vibranium^0.94237 wakandan^0.8197 adamantium^0.80724 wakanda^0.79122 alloy^0.75724 maclain^0.75623 klaw^0.75222 america's^0.74002 \"}\n"
     ]
    }
   ],
   "source": [
    "def generate_request(query, min_match=None, boost=None):\n",
    "    request = {\"query\": query,\n",
    "               \"query_fields\": [\"title\", \"body\"]}\n",
    "    if min_match:\n",
    "        request[\"min_match\"] = min_match\n",
    "    if boost:        \n",
    "        request[\"query_boosts\"] = boost\n",
    "    return request\n",
    "    \n",
    "simple_expansion = generate_request(f\"{query} {expansion}\", \"1\")\n",
    "increased_conceptual_precision = generate_request(f\"{query} {expansion}\", \"30%\")\n",
    "increased_precision_same_recall = generate_request(f\"{query} AND ({expansion})\", \"2\")\n",
    "slightly_increased_recall = generate_request(f\"{query} {expansion}\", \"2\")\n",
    "same_results_better_ranking = generate_request(query, \"2\", expansion)\n",
    "\n",
    "print(f\"Simple Query Expansion:\\n{simple_expansion}\")\n",
    "print(\"\\nIncreased Precision, Reduced Recall Query:\")\n",
    "print(increased_conceptual_precision)\n",
    "print(\"\\nIncreased Precision, No Reduction in Recall:\")\n",
    "print(increased_precision_same_recall)\n",
    "print(\"\\nSlightly Increased Recall Query:\")\n",
    "print(slightly_increased_recall)\n",
    "print(\"\\nSame Results, Better Conceptual Ranking:\")\n",
    "print(same_results_better_ranking)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Content-based Recommendations"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def extract_phrases(document):\n",
    "    \"Stubbed entity extraction\"\n",
    "    return [\"this\", \"doc\", \"contains\", \"the\", \"words\", \"luke\", \n",
    "            \"magneto\", \"cyclops\", \"darth vader\", \"princess leia\", \n",
    "            \"wolverine\", \"apple\", \"banana\", \"galaxy\", \"force\", \n",
    "            \"blaster\", \"and\", \"chloe\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "luke  0.75212\n",
      "force  0.73248\n",
      "darth vader  0.69378\n",
      "galaxy  0.58693\n",
      "princess leia  0.50491\n",
      "blaster  0.47143\n",
      "this  0.19193\n",
      "the  0.17519\n",
      "words  0.10144\n",
      "and  0.09709\n",
      "contains  0.03434\n",
      "doc  0.00885\n",
      "chloe  0.0\n",
      "cyclops  -0.01825\n",
      "magneto  -0.02175\n",
      "banana  -0.0319\n",
      "wolverine  -0.03362\n",
      "apple  -0.03894\n"
     ]
    }
   ],
   "source": [
    "stackexchange_skg = get_skg(engine.get_collection(\"stackexchange\"))\n",
    "\n",
    "classification = \"star wars\"\n",
    "document = \"\"\"this doc contains the words luke, magneto, cyclops,\n",
    "              darth vader, princess leia, wolverine, apple, banana,\n",
    "              galaxy, force, blaster, and chloe.\"\"\"\n",
    "parsed_document = extract_phrases(document)\n",
    "nodes_to_traverse = [{\"field\": \"body\", \"values\": [classification]},\n",
    "                     {\"field\": \"body\", \"values\": parsed_document}]\n",
    "\n",
    "traversal = stackexchange_skg.traverse(*nodes_to_traverse)\n",
    "\n",
    "print_graph(traversal, classification)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.9"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Expanded Query:\n",
      "\"luke\"^0.75212 \"force\"^0.73248 \"darth vader\"^0.69378 \"galaxy\"^0.58693 \"princess leia\"^0.50491 \"blaster\"^0.47143\n"
     ]
    }
   ],
   "source": [
    "def get_scored_terms(traversal):\n",
    "    return {term: data[\"relatedness\"]\n",
    "            for term, data in traversal[\"graph\"][0][\"values\"][\"star wars\"] \\\n",
    "                                  [\"traversals\"][0][\"values\"].items()}\n",
    "\n",
    "rec_query = \" \".join(f'\"{term}\"^{score}'\n",
    "                     for term, score in get_scored_terms(traversal).items()\n",
    "                     if score > 0.25)\n",
    "\n",
    "print(f\"Expanded Query:\\n{rec_query}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[\n",
      "  {\n",
      "    \"title\": \"At the end of Return of the Jedi, did Darth Vader learn that Princess Leia was his daughter?\"\n",
      "  },\n",
      "  {\n",
      "    \"title\": \"Did Luke know the &quot;Chosen One&quot; prophecy?\"\n",
      "  },\n",
      "  {\n",
      "    \"title\": \"Was Darth Vader at his strongest during Episode III?\"\n",
      "  },\n",
      "  {\n",
      "    \"title\": \"Why couldn't Snoke or Kylo Ren trace Luke using the Force?\"\n",
      "  },\n",
      "  {\n",
      "    \"title\": \"Does Kylo Ren know that Darth Vader reconciled with Luke?\"\n",
      "  }\n",
      "]\n"
     ]
    }
   ],
   "source": [
    "stackexchange_collection = engine.get_collection(\"stackexchange\")\n",
    "\n",
    "request = {\"query\": rec_query,\n",
    "           \"query_fields\": [\"title\", \"body\"],\n",
    "           \"return_fields\": [\"title\"],\n",
    "           \"limit\": 5,\n",
    "           \"filters\": [(\"title\", \"*\")]}\n",
    "\n",
    "response = stackexchange_collection.search(**request)\n",
    "\n",
    "print(json.dumps(response[\"docs\"], indent=2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exploring Arbitrary Relationships"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Listing 5.11"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_graph(traversal, node1_query, node2_query):\n",
    "    for term, data in traversal[\"graph\"][0][\"values\"][node1_query] \\\n",
    "                          [\"traversals\"][0][\"values\"][node2_query] \\\n",
    "                          [\"traversals\"][0][\"values\"].items():\n",
    "        print(f'{term}  {data[\"relatedness\"]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "jean  0.84915\n",
      "grey  0.74742\n",
      "summers  0.61021\n",
      "cyclops  0.60693\n",
      "xavier  0.53004\n",
      "wolverine  0.48053\n",
      "mutant  0.46532\n",
      "x  0.45028\n",
      "mutants  0.42568\n",
      "magneto  0.42197\n"
     ]
    }
   ],
   "source": [
    "scifi_skg = get_skg(engine.get_collection(\"scifi\"))\n",
    "\n",
    "starting_node = \"jean grey\"\n",
    "relationship = \"in love with\"\n",
    "nodes_to_traverse = [{\"field\": \"body\", \"values\": [starting_node]},\n",
    "                     {\"field\": \"body\", \"values\": [relationship],\n",
    "                      \"default_operator\": \"OR\"},\n",
    "                     {\"field\": \"body\",\n",
    "                      \"min_occurrences\": 25, \"limit\": 10}]\n",
    "\n",
    "traversal = scifi_skg.traverse(*nodes_to_traverse)\n",
    "\n",
    "print_graph(traversal, starting_node, relationship)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Bonus Examples (not included in chapter)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_graph(traversal, query):\n",
    "    for term, data in traversal[\"graph\"][0][\"values\"][query][\"traversals\"][0][\"values\"].items():\n",
    "        print(f'{term}  {data[\"relatedness\"]}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "spark  0.80665\n",
      "hadoop  0.59424\n",
      "hive  0.52983\n",
      "kafka  0.51552\n",
      "impala  0.45309\n",
      "streamsets  0.39341\n",
      "scala  0.38564\n",
      "flume  0.38401\n",
      "attunity  0.37374\n",
      "mapreduce  0.36195\n"
     ]
    }
   ],
   "source": [
    "jobs_skg = get_skg(engine.get_collection(\"jobs\"))\n",
    "\n",
    "nodes_to_traverse = [{\"field\": \"job_description\", \"values\": [\"spark\"]},\n",
    "                     {\"field\": \"job_description\"}]\n",
    "\n",
    "traversal = jobs_skg.traverse(*nodes_to_traverse)\n",
    "\n",
    "print_graph(traversal, \"spark\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chef  0.80689\n",
      "puppet  0.59501\n",
      "ansible  0.52824\n",
      "terraform  0.3866\n",
      "jenkins  0.30455\n",
      "culinary  0.25935\n",
      "docker  0.25145\n",
      "cd  0.2434\n",
      "ci  0.23938\n",
      "ruby  0.20856\n"
     ]
    }
   ],
   "source": [
    "nodes_to_traverse = [{\"field\": \"job_description\", \"values\": [\"chef\"]},\n",
    "                     {\"field\": \"job_description\", \"min_popularity\": 0.0005}]\n",
    "\n",
    "traversal = jobs_skg.traverse(*nodes_to_traverse)\n",
    "\n",
    "print_graph(traversal, \"chef\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Success!\n",
    "\n",
    "You've leveraged a semantic knowledge graph to find related terms for a query, performed query expansion based upon semantically-similar terms, explored multiple different way to impact precision and recall of queries through integrating semantically-augmented queries, generated content-based recommendations leveraging a semantic knowledge graph, explored arbitrary relationship types by traversing a semantic knowledge graph.\n",
    "\n",
    "Semantic knowledge graphs can be a powerful tool for understaning user intent and interpreting both queries and content based upon meaning instead of just text kewords.\n",
    "\n",
    "Up next: Chapter 6 - [Using Context to Learn Domain-specific Language ](../ch06/1.skg-classification-disambiguation.ipynb)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
